5  Subgroup analysis

Workflow to establish a subgroup analysis after multiple imputation and propensity Score matching

Author

Janick Weberpals, RPh, PhD

Published

September 19, 2024

This is a reproducible example on how to establish a workflow to run a subgroup analysis after multiple imputation and propensity Score matching.

Load packages:

library(dplyr)
library(survival)
library(mice)
library(MatchThem)
library(survey)
library(here)
library(gtsummary)
library(parallelly)
library(ranger)
library(furrr)

source(here("functions", "source_encore.io_functions.R"))

# track time
runtime <- tictoc::tic()

5.1 About

This script is adapted from Noah Greifer’s highly recommended blog post on “Subgroup Analysis After Propensity Score Matching Using R”.

For a more formal manuscript on subgroup analysis with propensity scores, see Green and Stuart.(Green and Stuart 2014)

5.2 Data generation

We again use the simulate_flaura() function to simulate a realistic oncology comparative effectiveness cohort analytic dataset.

# load example dataset with missing observations
data_miss <- simulate_flaura(
  n_total = 3500, 
  seed = 42, 
  include_id = FALSE, 
  imposeNA = TRUE
  )

covariates_for_ps <- setdiff(covariates_for_ps, "dem_sex_cont")

5.3 Moderator covariate

In this example, we assume heterogeneous treatment effect by sex and we aim to assess the average treatment effect among the treated for female and male patients separately. The effect size is time to all-cause mortality. In this dataset, sex is encoded with a binary covariate with 0 = female and 1 = male.

table(data_miss$dem_sex_cont)

   0    1 
2354 1146 

5.4 Multiple imputation

Both the imputation and propensity score step Multiple imputation using mice:

# impute data
female_imp <- futuremice(
  parallelseed = 42,
  n.core = parallel::detectCores()-1,
  data = data_miss |> filter(dem_sex_cont == 0),
  method = "rf",
  m = 10,
  print = FALSE
  )
Warning: Number of logged events: 1
Warning: Number of logged events: 1
Warning: Number of logged events: 1
male_imp <- futuremice(
  parallelseed = 42,
  n.core = parallel::detectCores()-1,
  data = data_miss |> filter(dem_sex_cont == 1),
  method = "rf",
  m = 10,
  print = FALSE
  )
Warning: Number of logged events: 1
Warning: Number of logged events: 1
Warning: Number of logged events: 1

5.5 Propensity score matching and weighting

Apply propensity score matching and weighting with replacement within in each imputed dataset.

# apply propensity score matching on mids object
ps_form <- as.formula(paste("treat ~", paste(covariates_for_ps, collapse = " + ")))
ps_form
treat ~ dem_age_index_cont + c_smoking_history + c_number_met_sites + 
    c_hemoglobin_g_dl_cont + c_urea_nitrogen_mg_dl_cont + c_platelets_10_9_l_cont + 
    c_calcium_mg_dl_cont + c_glucose_mg_dl_cont + c_lymphocyte_leukocyte_ratio_cont + 
    c_alp_u_l_cont + c_protein_g_l_cont + c_alt_u_l_cont + c_albumin_g_l_cont + 
    c_bilirubin_mg_dl_cont + c_chloride_mmol_l_cont + c_monocytes_10_9_l_cont + 
    c_eosinophils_leukocytes_ratio_cont + c_ldh_u_l_cont + c_hr_cont + 
    c_sbp_cont + c_oxygen_cont + c_ecog_cont + c_neutrophil_lymphocyte_ratio_cont + 
    c_bmi_cont + c_ast_alt_ratio_cont + c_stage_initial_dx_cont + 
    dem_race + dem_region + dem_ses + c_time_dx_to_index

5.5.1 Matching

match_within_strata <- function(i, imputed_data = NULL, ps_formula = NULL){
  
  matched <- MatchIt::matchit(
    formula = ps_formula, 
    data = mice::complete(imputed_data, i),
    method = "nearest",
    caliper = 0.01,
    ratio = 1,
    replace = F
    ) |> 
    MatchIt::match.data()
  
  return(matched)
  
}

female_matched <- lapply(
  X = 1:female_imp$m, 
  FUN = match_within_strata, 
  imputed_data = female_imp,
  ps_formula = ps_form
  )

male_matched <- lapply(
  X = 1:male_imp$m, 
  FUN = match_within_strata, 
  imputed_data = male_imp,
  ps_formula = ps_form
  )

# combine the mth imputed and matched datasets
combine_list <- function(i, data_0 = NULL, data_1 = NULL){
  
  data_combined <- rbind(data_0[[i]], data_1[[i]])
  
  return(data_combined)
  
}

matched_all <- lapply(
  X = 1:female_imp$m, 
  FUN = combine_list, 
  data_0 = female_matched,
  data_1 = male_matched
  )

5.5.2 Weighting

weight_within_strata <- function(i, imputed_data = NULL, ps_formula = NULL){
  
  weighted <- WeightIt::weightit(
    formula = ps_formula, 
    data = mice::complete(imputed_data, i),
    method = "glm",
    estimand = "ATT"
    )
  
  # trim extreme weights
  weighted <- trim(
    x = weighted, 
    at = .95, 
    lower = TRUE
    )
  
  weighted_data <- mice::complete(imputed_data, i) |> 
    mutate(weights = weighted$weights)
  
  return(weighted_data)
  
}

female_weighted <- lapply(
  X = 1:female_imp$m, 
  FUN = weight_within_strata, 
  imputed_data = female_imp,
  ps_formula = ps_form
  )

male_weighted <- lapply(
  X = 1:male_imp$m, 
  FUN = weight_within_strata, 
  imputed_data = male_imp,
  ps_formula = ps_form
  )

weighted_all <- lapply(
  X = 1:female_imp$m, 
  FUN = combine_list, 
  data_0 = female_weighted,
  data_1 = male_weighted
  )

5.6 Outcome model comparisons

5.6.1 Matching

cox_fit_matching <- function(i){
  
  survival_fit <- survival::coxph(
    data = i,
    formula = Surv(fu_itt_months, death_itt) ~ treat*dem_sex_cont, 
    weights = weights, 
    cluster = subclass,
    robust = TRUE
    )
  
}
matched_all |> 
  lapply(FUN = cox_fit_matching) |> 
  mice::pool() |> 
  broom::tidy(exponentiate = TRUE, conf.int = TRUE) |> 
  dplyr::select(term, estimate, std.error, conf.low, conf.high)
                term  estimate  std.error  conf.low conf.high
1              treat 0.7782032 0.05666319 0.6954915 0.8707515
2       dem_sex_cont 0.7590698 0.07551533 0.6537746 0.8813235
3 treat:dem_sex_cont 1.0198702 0.10694806 0.8252015 1.2604621

5.6.2 Weighting

cox_fit_weighting <- function(i){
  
  survival_fit <- survival::coxph(
    data = i,
    formula = Surv(fu_itt_months, death_itt) ~ treat*dem_sex_cont, 
    weights = weights, 
    robust = TRUE
    )
  
}
weighted_all |> 
  lapply(FUN = cox_fit_weighting) |> 
  mice::pool() |> 
  broom::tidy(exponentiate = TRUE, conf.int = TRUE) |> 
  dplyr::select(term, estimate, std.error, conf.low, conf.high)
                term  estimate  std.error  conf.low conf.high
1              treat 0.7820201 0.04219183 0.7199227 0.8494737
2       dem_sex_cont 0.7585226 0.05375601 0.6824368 0.8430913
3 treat:dem_sex_cont 1.0100191 0.07844426 0.8659504 1.1780567

5.7 References

5.8 Session info

Script runtime: 0.95 minutes.

pander::pander(subset(data.frame(sessioninfo::package_info()), attached==TRUE, c(package, loadedversion)))
  package loadedversion
dplyr dplyr 1.1.4
furrr furrr 0.3.1
future future 1.34.0
gtsummary gtsummary 2.0.1
here here 1.0.1
MatchThem MatchThem 1.2.1
Matrix Matrix 1.7-0
mice mice 3.16.0
parallelly parallelly 1.38.0
ranger ranger 0.16.0
survey survey 4.4-2
survival survival 3.5-8
pander::pander(sessionInfo())

R version 4.4.0 (2024-04-24)

Platform: x86_64-pc-linux-gnu

locale: LC_CTYPE=C.UTF-8, LC_NUMERIC=C, LC_TIME=C.UTF-8, LC_COLLATE=C.UTF-8, LC_MONETARY=C.UTF-8, LC_MESSAGES=C.UTF-8, LC_PAPER=C.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=C.UTF-8 and LC_IDENTIFICATION=C

attached base packages: grid, stats, graphics, grDevices, datasets, utils, methods and base

other attached packages: furrr(v.0.3.1), future(v.1.34.0), ranger(v.0.16.0), parallelly(v.1.38.0), gtsummary(v.2.0.1), here(v.1.0.1), survey(v.4.4-2), Matrix(v.1.7-0), MatchThem(v.1.2.1), mice(v.3.16.0), survival(v.3.5-8) and dplyr(v.1.1.4)

loaded via a namespace (and not attached): gtable(v.0.3.5), shape(v.1.4.6.1), xfun(v.0.47), ggplot2(v.3.5.1), htmlwidgets(v.1.6.4), MatchIt(v.4.5.5), lattice(v.0.22-6), simsurv(v.1.0.0), vctrs(v.0.6.5), tools(v.4.4.0), generics(v.0.1.3), parallel(v.4.4.0), tibble(v.3.2.1), fansi(v.1.0.6), pan(v.1.9), pkgconfig(v.2.0.3), jomo(v.2.7-6), assertthat(v.0.2.1), lifecycle(v.1.0.4), stringr(v.1.5.1), compiler(v.4.4.0), tictoc(v.1.2.1), munsell(v.0.5.1), mitools(v.2.4), codetools(v.0.2-20), htmltools(v.0.5.8.1), yaml(v.2.3.10), glmnet(v.4.1-8), pillar(v.1.9.0), nloptr(v.2.1.1), crayon(v.1.5.3), tidyr(v.1.3.1), MASS(v.7.3-60.2), sessioninfo(v.1.2.2), iterators(v.1.0.14), rpart(v.4.1.23), boot(v.1.3-30), foreach(v.1.5.2), mitml(v.0.4-5), nlme(v.3.1-164), locfit(v.1.5-9.10), WeightIt(v.1.3.0), tidyselect(v.1.2.1), digest(v.0.6.37), stringi(v.1.8.4), pander(v.0.6.5), listenv(v.0.9.1), purrr(v.1.0.2), splines(v.4.4.0), rprojroot(v.2.0.4), fastmap(v.1.2.0), colorspace(v.2.1-1), cli(v.3.6.3), magrittr(v.2.0.3), utf8(v.1.2.4), broom(v.1.0.6), withr(v.3.0.1), scales(v.1.3.0), backports(v.1.5.0), rmarkdown(v.2.28), globals(v.0.16.3), nnet(v.7.3-19), lme4(v.1.1-35.5), chk(v.0.9.2), evaluate(v.0.24.0), knitr(v.1.48), rlang(v.1.1.4), Rcpp(v.1.0.13), glue(v.1.7.0), DBI(v.1.2.3), renv(v.1.0.7), minqa(v.1.2.8), jsonlite(v.1.8.8) and R6(v.2.5.1)

pander::pander(options('repos'))
  • repos:

    REPO_NAME
    https://packagemanager.posit.co/cran/linux/noble/latest